BA2_FITNESS_DATA=readRDS(file="LR_IMMUNO_FITNESS_DATA_TRAPP_SUBVARS.rds") %>% filter(Variant=="BA2")
BA2_MIRA_DATASET  = fread(cmd = 'unzip -p MIRA_DATA_FULL.txt.zip')%>% mutate(Omicron_mutated = ifelse(Peptide %in% BA2_FITNESS_DATA$Peptide, "OMICRON_MUTATED","WT"))%>% mutate(Variant="BA2")

BA4_FITNESS_DATA=readRDS(file="LR_IMMUNO_FITNESS_DATA_TRAPP_SUBVARS.rds") %>% filter(Variant=="BA4")
BA4_MIRA_DATASET  = fread(cmd = 'unzip -p MIRA_DATA_FULL.txt.zip')%>% mutate(Omicron_mutated = ifelse(Peptide %in% BA4_FITNESS_DATA$Peptide, "OMICRON_MUTATED","WT"))%>% mutate(Variant="BA4")

BA5_FITNESS_DATA=readRDS(file="LR_IMMUNO_FITNESS_DATA_TRAPP_SUBVARS.rds") %>% filter(Variant=="BA5")
BA5_MIRA_DATASET  = fread(cmd = 'unzip -p MIRA_DATA_FULL.txt.zip')%>% mutate(Omicron_mutated = ifelse(Peptide %in% BA5_FITNESS_DATA$Peptide, "OMICRON_MUTATED","WT"))%>% mutate(Variant="BA5")

MIRA_DATASET = rbind(BA2_MIRA_DATASET, BA4_MIRA_DATASET, BA5_MIRA_DATASET)

CLASS I Only and Get rid unproductive TCRs

MIRA_DATASET=MIRA_DATASET %>% filter(! TCR %in% grep("unproductive",TCR, value = TRUE))
MIRA_DATASET=MIRA_DATASET %>% filter(Data_Class == "ClassI")
MIRA_DATASET=MIRA_DATASET%>% filter(! Cohort %in% "Healthy (No known exposure)")

How many patients per cohort

#Determine each patient and cohort combo
PATIENTS_AND_COHORTS=MIRA_DATASET %>% select(Experiment, Cohort) %>%unique
PATIENTS_AND_COHORTS %>% group_by(Cohort) %>% dplyr::summarise(n=n())%>%arrange(desc(n))%>%
    ggbarplot(x="Cohort",y="n",label=T,fill="Cohort")+ylab("Number of Patients")+theme(legend.position = "none")+coord_flip()

MIRA_DATASET %>% select(Cohort,TCR) %>% group_by(Cohort) %>% dplyr::summarise(n=n_distinct(TCR))%>%
    ggbarplot(x="Cohort",y="n",fill="Cohort")+ylab("Number of Unique TCRs")+theme(legend.position = "none")+coord_flip()

How many observations are mutated/not out of entire mira data?

VARIANT="BA2"
MIRA_DATASET %>% filter(Variant == VARIANT) %>% select(Omicron_mutated,Variant) %>% table
##                  Variant
## Omicron_mutated      BA2
##   OMICRON_MUTATED   4255
##   WT              107682
MIRA_DATASET %>% filter(Variant == VARIANT) %>% select(Omicron_mutated,Variant) %>% table %>% prop.table()
##                  Variant
## Omicron_mutated          BA2
##   OMICRON_MUTATED 0.03801245
##   WT              0.96198755
VARIANT="BA4"
MIRA_DATASET %>% filter(Variant == VARIANT) %>% select(Omicron_mutated,Variant) %>% table
##                  Variant
## Omicron_mutated      BA4
##   OMICRON_MUTATED  11548
##   WT              100389
MIRA_DATASET %>% filter(Variant == VARIANT) %>% select(Omicron_mutated,Variant) %>% table %>% prop.table()
##                  Variant
## Omicron_mutated         BA4
##   OMICRON_MUTATED 0.1031652
##   WT              0.8968348
VARIANT="BA5"
MIRA_DATASET %>% filter(Variant == VARIANT) %>% select(Omicron_mutated,Variant) %>% table
##                  Variant
## Omicron_mutated      BA5
##   OMICRON_MUTATED   4449
##   WT              107488
MIRA_DATASET %>% filter(Variant == VARIANT) %>% select(Omicron_mutated,Variant) %>% table %>% prop.table()
##                  Variant
## Omicron_mutated          BA5
##   OMICRON_MUTATED 0.03974557
##   WT              0.96025443
MIRA_DATASET %>% group_by(Omicron_mutated,Variant) %>% dplyr::summarise(n=n())%>% group_by(Variant)%>%mutate(Freq=n/sum(n))%>%
        ggbarplot(x="Variant",y="Freq",fill="Omicron_mutated",position = position_dodge())
## `summarise()` has grouped output by 'Omicron_mutated'. You can override using the `.groups` argument.

What % of patient TCR-Epitope repertoires are mutated?

Class I only

VARIANT="BA2"
MIRA_DATASET %>% filter(Variant == VARIANT)%>% filter(Data_Class == "ClassI")%>% group_by(Experiment,Cohort,Omicron_mutated) %>% dplyr::summarise(n=n())%>% mutate(Freq=n/sum(n))%>% ungroup%>%
    tidyr::complete_(cols = c("Experiment", "Omicron_mutated"), fill = list(n=0, Freq=0))%>%
    filter(Omicron_mutated == "OMICRON_MUTATED")%>% arrange(desc(Freq))%>%
    ggbarplot(x="Experiment",y="Freq",fill="Cohort")+rotate_x_text()+ggtitle(VARIANT)
## Warning: `complete_()` was deprecated in tidyr 1.0.0.
## Please use `complete()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
## `summarise()` has grouped output by 'Experiment', 'Cohort'. You can override using the `.groups` argument.

VARIANT="BA4"
MIRA_DATASET %>% filter(Variant == VARIANT)%>% filter(Data_Class == "ClassI")%>% group_by(Experiment,Cohort,Omicron_mutated) %>% dplyr::summarise(n=n())%>% mutate(Freq=n/sum(n))%>% ungroup%>%
    tidyr::complete_(cols = c("Experiment", "Omicron_mutated"), fill = list(n=0, Freq=0))%>%
    filter(Omicron_mutated == "OMICRON_MUTATED")%>% arrange(desc(Freq))%>%
    ggbarplot(x="Experiment",y="Freq",fill="Cohort")+rotate_x_text()+ggtitle(VARIANT)
## `summarise()` has grouped output by 'Experiment', 'Cohort'. You can override using the `.groups` argument.

VARIANT="BA5"
MIRA_DATASET %>% filter(Variant == VARIANT)%>% filter(Data_Class == "ClassI")%>% group_by(Experiment,Cohort,Omicron_mutated) %>% dplyr::summarise(n=n())%>% mutate(Freq=n/sum(n))%>% ungroup%>%
    tidyr::complete_(cols = c("Experiment", "Omicron_mutated"), fill = list(n=0, Freq=0))%>%
    filter(Omicron_mutated == "OMICRON_MUTATED")%>% arrange(desc(Freq))%>%
    ggbarplot(x="Experiment",y="Freq",fill="Cohort")+rotate_x_text()+ggtitle(VARIANT)
## `summarise()` has grouped output by 'Experiment', 'Cohort'. You can override using the `.groups` argument.

What proportion of epitope repertoire are affected?

VARIANT="BA2"
MIRA_DATASET %>% filter(Variant == VARIANT) %>% select(Experiment, Peptide,Cohort, Omicron_mutated)%>% distinct() %>% group_by(Experiment,Cohort, Omicron_mutated)%>% dplyr::summarise(n=n())%>% mutate(Freq=n/sum(n))%>%ungroup %>%
    tidyr::complete_(cols = c("Experiment", "Omicron_mutated"), fill = list(n=0, Freq=0))%>%
    filter(Omicron_mutated == "OMICRON_MUTATED")%>% arrange(desc(Freq))%>%
    ggbarplot(x="Experiment",y="Freq",fill="Cohort")+rotate_x_text()+ggtitle(VARIANT)
## `summarise()` has grouped output by 'Experiment', 'Cohort'. You can override using the `.groups` argument.

VARIANT="BA4"
MIRA_DATASET %>% filter(Variant == VARIANT) %>% select(Experiment, Peptide,Cohort, Omicron_mutated)%>% distinct() %>% group_by(Experiment,Cohort, Omicron_mutated)%>% dplyr::summarise(n=n())%>% mutate(Freq=n/sum(n))%>%ungroup %>%
    tidyr::complete_(cols = c("Experiment", "Omicron_mutated"), fill = list(n=0, Freq=0))%>%
    filter(Omicron_mutated == "OMICRON_MUTATED")%>% arrange(desc(Freq))%>%
    ggbarplot(x="Experiment",y="Freq",fill="Cohort")+rotate_x_text()+ggtitle(VARIANT)
## `summarise()` has grouped output by 'Experiment', 'Cohort'. You can override using the `.groups` argument.

VARIANT="BA5"
MIRA_DATASET %>% filter(Variant == VARIANT) %>% select(Experiment, Peptide,Cohort, Omicron_mutated)%>% distinct() %>% group_by(Experiment,Cohort, Omicron_mutated)%>% dplyr::summarise(n=n())%>% mutate(Freq=n/sum(n))%>%ungroup %>%
    tidyr::complete_(cols = c("Experiment", "Omicron_mutated"), fill = list(n=0, Freq=0))%>%
    filter(Omicron_mutated == "OMICRON_MUTATED")%>% arrange(desc(Freq))%>%
    ggbarplot(x="Experiment",y="Freq",fill="Cohort")+rotate_x_text()+ggtitle(VARIANT)
## `summarise()` has grouped output by 'Experiment', 'Cohort'. You can override using the `.groups` argument.

FITNESS_DATA_FULL = readRDS("LR_IMMUNO_FITNESS_DATA_TRAPP_SUBVARS.rds")
FITNESS_DATA_FULL=FITNESS_DATA_FULL %>% mutate(MT_ImmunogenicityPrediction = ifelse(OmicronPrediction<0.75, "Negative","Positive")) %>% mutate(WT_ImmunogenicityPrediction = ifelse(WuhanPrediction<0.75, "Negative","Positive"))

LR_IMMUNO=readRDS("LR_IMMUNO_FITNESS_DATA_PAN_PEPTIDE_SUBVAR.rds")%>% select(Peptide,VariantAlignment,LR_Immuno,Variant)
NON_MUTATED_LR_IMMUNO = data.table(Peptide=unique(MIRA_DATASET$Peptide),
                                   VariantAlignment="NA",
                                    LR_Immuno=0,
                                    Variant = c("BA2","BA4","BA5"))
NON_MUTATED_LR_IMMUNO=NON_MUTATED_LR_IMMUNO %>% filter(!Peptide %in% LR_IMMUNO$Peptide)
LR_IMMUNO=LR_IMMUNO %>% rbind(NON_MUTATED_LR_IMMUNO)
#LR_IMMUNO=LR_IMMUNO %>% mutate(LossPMHC = ifelse(Peptide %in% LOSS_PMHC, TRUE, FALSE))
## Gives count, mean, standard deviation, standard error of the mean, and confidence interval (default 95%).
##   data: a data frame.
##   measurevar: the name of a column that contains the variable to be summariezed
##   groupvars: a vector containing names of columns that contain grouping variables
##   na.rm: a boolean that indicates whether to ignore NA's
##   conf.interval: the percent range of the confidence interval (default is 95%)
summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE,
                      conf.interval=.95, .drop=TRUE) {
    library(plyr)

    # New version of length which can handle NA's: if na.rm==T, don't count them
    length2 <- function (x, na.rm=FALSE) {
        if (na.rm) sum(!is.na(x))
        else       length(x)
    }

    # This does the summary. For each group's data frame, return a vector with
    # N, mean, and sd
    datac <- ddply(data, groupvars, .drop=.drop,
      .fun = function(xx, col) {
        c(N    = length2(xx[[col]], na.rm=na.rm),
          mean = mean   (xx[[col]], na.rm=na.rm),
          sd   = sd     (xx[[col]], na.rm=na.rm)
        )
      },
      measurevar
    )

    # Rename the "mean" column
    datac <- rename(datac, c("mean" = measurevar))

    datac$se <- datac$sd / sqrt(datac$N)  # Calculate standard error of the mean

    # Confidence interval multiplier for standard error
    # Calculate t-statistic for confidence interval:
    # e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1
    ciMult <- qt(conf.interval/2 + .5, datac$N-1)
    datac$ci <- datac$se * ciMult

    return(datac)
}
COHORT_LIST = MIRA_DATASET %>% select(Experiment,Cohort) %>% distinct()

VARIANT="BA2"
ERROR_DATASET <- summarySE(MIRA_DATASET %>% filter(Variant==VARIANT) %>% inner_join(LR_IMMUNO), measurevar="LR_Immuno", groupvars=c("Experiment"))
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## The following object is masked from 'package:purrr':
## 
##     compact
## The following object is masked from 'package:XVector':
## 
##     compact
## The following object is masked from 'package:IRanges':
## 
##     desc
## The following object is masked from 'package:S4Vectors':
## 
##     rename
## The following object is masked from 'package:ggpubr':
## 
##     mutate
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## Joining, by = c("Peptide", "Variant")
TCR_DT=ERROR_DATASET
LR_IMMUNO_PLT_MIRA_BA2=TCR_DT %>% inner_join(COHORT_LIST)%>%
  ggplot(aes(x=reorder(Experiment, -LR_Immuno), y=LR_Immuno,fill=Cohort))+geom_bar(stat = "identity")+
    geom_errorbar(aes(ymin=LR_Immuno-se, ymax=LR_Immuno+se), colour="black", width=.5,                    # Width of the error bars
                  position=position_dodge(.9))+theme_pubr(base_size = 16)+rotate_x_text()+xlab("Subject ID")+ggtitle(VARIANT)
## Joining, by = "Experiment"
LR_IMMUNO_PLT_MIRA_BA2

saveRDS(LR_IMMUNO_PLT_MIRA_BA2,file="LR_IMMUNO_PLT_MIRA_BA2.rds")
VARIANT="BA4"
ERROR_DATASET <- summarySE(MIRA_DATASET %>% filter(Variant==VARIANT) %>% inner_join(LR_IMMUNO), measurevar="LR_Immuno", groupvars=c("Experiment"))
## Joining, by = c("Peptide", "Variant")
TCR_DT=ERROR_DATASET
LR_IMMUNO_PLT_MIRA_BA4=TCR_DT %>% inner_join(COHORT_LIST)%>%
  ggplot(aes(x=reorder(Experiment, -LR_Immuno), y=LR_Immuno,fill=Cohort))+geom_bar(stat = "identity")+
    geom_errorbar(aes(ymin=LR_Immuno-se, ymax=LR_Immuno+se), colour="black", width=.5,                    # Width of the error bars
                  position=position_dodge(.9))+theme_pubr(base_size = 16)+rotate_x_text()+xlab("Subject ID")+ggtitle(VARIANT)
## Joining, by = "Experiment"
LR_IMMUNO_PLT_MIRA_BA4

saveRDS(LR_IMMUNO_PLT_MIRA_BA4,file="LR_IMMUNO_PLT_MIRA_BA4.rds")
VARIANT="BA5"
ERROR_DATASET <- summarySE(MIRA_DATASET %>% filter(Variant==VARIANT) %>% inner_join(LR_IMMUNO), measurevar="LR_Immuno", groupvars=c("Experiment"))
## Joining, by = c("Peptide", "Variant")
TCR_DT=ERROR_DATASET
LR_IMMUNO_PLT_MIRA_BA5=TCR_DT %>% inner_join(COHORT_LIST)%>%
  ggplot(aes(x=reorder(Experiment, -LR_Immuno), y=LR_Immuno,fill=Cohort))+geom_bar(stat = "identity")+
    geom_errorbar(aes(ymin=LR_Immuno-se, ymax=LR_Immuno+se), colour="black", width=.5,                    # Width of the error bars
                  position=position_dodge(.9))+theme_pubr(base_size = 16)+rotate_x_text()+xlab("Subject ID")+ggtitle(VARIANT)
## Joining, by = "Experiment"
saveRDS(LR_IMMUNO_PLT_MIRA_BA5,file="LR_IMMUNO_PLT_MIRA_BA5.rds")